miR-375 KO (baseline)
Part2: scRNA-seq clustering
Sample description
This is the single cell RNAseq data of two 375KO mice and two WT mice (generated by 10X Genomics Chromium platform - 3’ V2 kit). Samples were jejunal crypts isolated from sex and age matched 375KO and WT mice (male; 12m old; chow diet condition).
Set 1 contained one 375KO and one WT mice processed in April 2019.
Set 2 contained one 375KO and one WT mice processed in October 2019.
Load required libraries
library(stringr)
library(Seurat, lib.loc = "/programs/R-4.0.5/library")
library(tidyverse)
library(openxlsx)
library(scDblFinder)
library(BiocParallel)
library(glmGamPoi)Load soupx output into Seurat objects
set.seed(1234)
proj_name <- "375KO-12m"
##Setup the Seurat objects
# Load the counts matricies into separate datasets
wt.data <- Read10X(data.dir = "/home/yah6/2022_04_375KO-12m_reanalysis/soupx_WT_filt/")
ko.data <- Read10X(data.dir = "/home/yah6/2022_04_375KO-12m_reanalysis/soupx_KO_filt/")
wt2.data <- Read10X(data.dir = "/home/yah6/2022_04_375KO-12m_reanalysis/soupx_WT2_filt/")
ko2.data <- Read10X(data.dir = "/home/yah6/2022_04_375KO-12m_reanalysis/soupx_KO2_filt/")
# check for duplicated barcodes(column names) within a dataset
any(duplicated(x = colnames(x = wt.data)))## [1] FALSE
any(duplicated(x = colnames(x = ko.data)))## [1] FALSE
any(duplicated(x = colnames(x = wt2.data)))## [1] FALSE
any(duplicated(x = colnames(x = ko2.data)))## [1] FALSE
# Rename columns to avoid trouble when merging data sets (since identical columns between KO and WT)
colnames(x = wt.data) <- paste('WT', colnames(x = wt.data), sep = '_')
colnames(x = ko.data) <- paste('375KO', colnames(x = ko.data), sep = '_')
colnames(x = wt2.data) <- paste('WT2', colnames(x = wt2.data), sep = '_')
colnames(x = ko2.data) <- paste('375KO2', colnames(x = ko2.data), sep = '_')
### Initialize the Seurat object with the raw, non-normalized data
# Keep all genes expressed in >= 3 cells.
# Keep all cells with at least 200 detected genes.
wt <- CreateSeuratObject(counts = wt.data,
min.cells = 3,
min.features = 200,
project = "WT")
ko <- CreateSeuratObject(counts = ko.data,
min.cells = 3,
min.features = 200,
project = "375KO")
wt2 <- CreateSeuratObject(counts = wt2.data,
min.cells = 3,
min.features = 200,
project = "WT2")
ko2 <- CreateSeuratObject(counts = ko2.data,
min.cells = 3,
min.features = 200,
project = "375KO2")
### add treatment/group status to objects
# add group
wt[["group"]] <- "WT"
ko[["group"]] <- "375KO"
wt2[["group"]] <- "WT"
ko2[["group"]] <- "375KO"
# add individual mouse ID
wt[["mouse"]] <- "WT-1"
ko[["mouse"]] <- "375KO-1"
wt2[["mouse"]] <- "WT-2"
ko2[["mouse"]] <- "375KO-2"
wt #4853## An object of class Seurat
## 16180 features across 4853 samples within 1 assay
## Active assay: RNA (16180 features, 0 variable features)
ko #5095## An object of class Seurat
## 15894 features across 5095 samples within 1 assay
## Active assay: RNA (15894 features, 0 variable features)
wt2 #1206## An object of class Seurat
## 14249 features across 1206 samples within 1 assay
## Active assay: RNA (14249 features, 0 variable features)
ko2 #1245## An object of class Seurat
## 14312 features across 1245 samples within 1 assay
## Active assay: RNA (14312 features, 0 variable features)
Doublet detection
Cell doublets were identified using scDblFinder, which uses functions from various other doublet detection methods, but has been rewritten to be more compute efficient. A comparison between other methods and scDblFinder can be found in Xi and Li. In this method, doublets are detected through the introduction of pseudo-doublets by combining two random cells. A nearest-neighbor approach is then employed to see which cells associate most closely with these pseudo-doublets. The percent of cells called as doublets is based on 10x genomics known doublet rate.
Notes: I had used the codes below to generate singlet flat file in April 2022. For rendering purpose, I set eval = F for the block and then load pre-existing data in the next code chunk.
set.seed(1234)
# Merge datasets into one single seurat object
alldata <- merge(wt, c(wt2, ko, ko2))
# Convert to single cell experiment
sce <- as.SingleCellExperiment(alldata)
# Run scDoubletFinder
bp <- MulticoreParam(3, RNGseed=1234) # this ensures reproducibility when multithreading
bpstart(bp)
sce <- scDblFinder(sce,
samples="group",
BPPARAM=bp)
bpstop(bp)
# If the code above doesn't work, unhash and try this.
# sce <- scDblFinder(sce,
# samples="sample",
# BPPARAM=MulticoreParam(3))
# Quick check of the results
table(sce$scDblFinder.class) # singlet 11697, doublet 702
# Write out metadata of doublet scores to flat file
sce@colData %>%
as.data.frame() %>%
rownames_to_column(var = "cell_id") %>%
dplyr::select(cell_id, contains("scDblFinder"))
write_csv(paste0(proj_name, "_scDbtFinder_metadata.csv"))
# Get cell ID that are defined as singlet
sce_sub <- subset(sce, ,scDblFinder.class == "singlet")
table(sce_sub$scDblFinder.class)
singlet_cell_id <- colnames(sce_sub)Doublet detection - load data
# Load doublet metadata sheet
mdata <- read.csv("/home/yah6/2022_04_375KO-12m_reanalysis/375KO-12m_scDbtFinder_metadata_R1.csv")
# Quick check
nrow(mdata) #12399## [1] 12399
nrow(filter(mdata, scDblFinder.class =="singlet")) #11697## [1] 11697
nrow(filter(mdata, scDblFinder.class =="doublet")) #702## [1] 702
# Get cell ID that are defined as singlet
singlet_cell_id <- as.vector(mdata$cell_id[which(mdata$scDblFinder.class =="singlet")])QC and filtering
Notes: nFeature_RNA is the number of unique genes detected per cell; nCount_RNA is the number of unique UMI molecules per cell; percentage.mt is the percentage of mitochondria genes per cell (indicating apoptotic cells).
Based on the resulting QC plots, I chose nFeature_RNA greater than 1500 & percent.mt smaller than 25% as the cutoff for the downstream analyses.
set.seed(1234)
# List all samples for looping function
obj.list <- list(wt,
wt2,
ko,
ko2)
# Remove cells classified as doublets by scDblFinder
for (i in 1:length(obj.list)) {
print(obj.list[[i]]@project.name)
print(paste0("Number of cells: ", ncol(obj.list[[i]])))
obj.list[[i]] <- obj.list[[i]][,intersect(colnames(obj.list[[i]]), singlet_cell_id)]
print(paste0("Number of cells: ", ncol(obj.list[[i]])))
print(" ")
}## [1] "WT"
## [1] "Number of cells: 4853"
## [1] "Number of cells: 4623"
## [1] " "
## [1] "WT2"
## [1] "Number of cells: 1206"
## [1] "Number of cells: 1118"
## [1] " "
## [1] "375KO"
## [1] "Number of cells: 5095"
## [1] "Number of cells: 4749"
## [1] " "
## [1] "375KO2"
## [1] "Number of cells: 1245"
## [1] "Number of cells: 1207"
## [1] " "
# Add % of reads mapping to mitochondrial genes
for (i in 1:length(obj.list)) {
obj.list[[i]] <- PercentageFeatureSet(obj.list[[i]], pattern = "^mt-", col.name = 'percent.mt')
obj.list[[i]][["per.mito.bin"]] <- ifelse(obj.list[[i]][["percent.mt"]] > 20, "> 20%", ifelse(obj.list[[i]][["percent.mt"]] > 10, "10%-20%", ifelse(obj.list[[i]][["percent.mt"]] > 2.5, "2.5%-10%", "< 2.5%")))
}
# Visualize QC metrics as a violin plot (hide individual data points)
plts <- lapply(obj.list, function(x) VlnPlot(x,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
group.by = "group",
pt.size = 0,
ncol = 3))
patchwork::wrap_plots(plts, ncol = 1)ggsave(paste0(proj_name, "_QC_no-pts.png"), units = "in", width = 8, height = 4 * length(obj.list), dpi = 300)
# Visualize QC metrics as a violin plot (show individual data points)
plts <- lapply(obj.list, function(x) VlnPlot(x,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
group.by = "group",
pt.size = 0.25,
ncol = 3))
patchwork::wrap_plots(plts, ncol = 1)ggsave(paste0(proj_name, "_QC.png"), units = "in", width = 8, height = 4 * length(obj.list), dpi = 300)
# Subset loop
for (i in 1:length(obj.list)) {
print(obj.list[[i]]@project.name)
print(paste0("Number of cells: ", ncol(obj.list[[i]])))
obj.list[[i]] <- subset(obj.list[[i]], subset = nFeature_RNA > 1500 & percent.mt < 25)
print(paste0("Number of cells: ", ncol(obj.list[[i]])))
print(" ")
}## [1] "WT"
## [1] "Number of cells: 4623"
## [1] "Number of cells: 1847"
## [1] " "
## [1] "WT2"
## [1] "Number of cells: 1118"
## [1] "Number of cells: 1090"
## [1] " "
## [1] "375KO"
## [1] "Number of cells: 4749"
## [1] "Number of cells: 2082"
## [1] " "
## [1] "375KO2"
## [1] "Number of cells: 1207"
## [1] "Number of cells: 1193"
## [1] " "
SC Transform
The step to get “normalized counts”. The latest version of
sctransform also supports using glmGamPoi package which substantially
improves the speed of the learning procedure. It can be invoked by
specifying method="glmGamPoi".
set.seed(1234)
obj.list <- lapply(X = obj.list, FUN = function(x) {
x <- SCTransform(x,
method = "glmGamPoi",
assay = "RNA",
new.assay.name = "SCT")
})##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========== | 14%
|
|============ | 17%
|
|============== | 21%
|
|================= | 24%
|
|=================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 55%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|=================================================== | 72%
|
|===================================================== | 76%
|
|======================================================== | 79%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========== | 14%
|
|============ | 17%
|
|============== | 21%
|
|================= | 24%
|
|=================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 55%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|=================================================== | 72%
|
|===================================================== | 76%
|
|======================================================== | 79%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
features <- SelectIntegrationFeatures(object.list = obj.list, nfeatures = 3000)
obj.list <- PrepSCTIntegration(object.list = obj.list, anchor.features = features)
integration.anchors <- FindIntegrationAnchors(object.list = obj.list, normalization.method = "SCT", anchor.features = features)
seurat_obj <- IntegrateData(anchorset = integration.anchors, normalization.method = "SCT")Clustering and visualization
The Seurat SCT vignette states that 30 PCs should be used, as this
will increase performance. Defining number of cell clusters -
FindClusters function was performed to generate UMAPs with
different clustering resolution (from 0.1 to 1.5). I chose resolution =
0.7 as the proper clustering resolution.
This code chunk also write an RDS file that include data from only
“singlet” cells.
set.seed(1234)
# Run the standard workflow for visualization and clustering
seurat_obj <- RunPCA(seurat_obj)
seurat_obj <- RunUMAP(seurat_obj, reduction = "pca", dims = 1:30)## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
seurat_obj <- FindNeighbors(seurat_obj, reduction = "pca", dims = 1:30)
# Create a sub-folder to store all the resolution iterations for clustering
dir.create("res_iter")## Warning in dir.create("res_iter"): 'res_iter' already exists
for (i in seq(.1, 1.5, by = .1)) {
seurat_obj_tmp <- FindClusters(seurat_obj, resolution = i)
DimPlot(seurat_obj_tmp, reduction = "umap", label = T)
ggsave(paste0("res_iter/", proj_name, '_res-iter_', i, '_umap.png'), units = 'in', width = 6, height = 5, dpi = 250)
}## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9667
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9497
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9357
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9233
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9129
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9028
## Number of communities: 14
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8951
## Number of communities: 18
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8883
## Number of communities: 20
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8812
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8753
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8696
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8635
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8572
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8519
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8464
## Number of communities: 23
## Elapsed time: 0 seconds
# Choose a proper resolution level
seurat_obj <- FindClusters(seurat_obj, resolution = 0.7)## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6212
## Number of edges: 221444
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8951
## Number of communities: 18
## Elapsed time: 0 seconds
# Set default assay to RNA
DefaultAssay(seurat_obj) <- "RNA"
seurat_obj <- NormalizeData(seurat_obj)
seurat_obj <- ScaleData(seurat_obj)
saveRDS(seurat_obj, paste0(proj_name, '_seurat-obj.rds'))UMAP plots
# UMAP by cell cluster ID
plt1 <- DimPlot(seurat_obj, reduction = "umap", label = T)
# UMAP by mouse groups (KO vs WT)
plt2 <- DimPlot(seurat_obj, reduction = "umap", group.by = "group")
plt1 | plt2ggsave(paste0(proj_name, '_umap.png'), units = 'in', width = 12, height = 5, dpi = 250)Overlain UMAP with other metadata info
# Cell cycling scores
seurat_obj <- CellCycleScoring(
seurat_obj,
s.features = str_to_title(cc.genes$s.genes),
g2m.features = str_to_title(cc.genes$g2m.genes),
assay = 'SCT',
set.ident = TRUE
)## Warning: The following features are not present in the object: Mlf1ip, not
## searching for symbol synonyms
Idents(seurat_obj) <- "seurat_clusters"
# Plot metadata
plt1 <- DimPlot(seurat_obj, reduction = "umap", label = T)
plt2 <- DimPlot(seurat_obj, reduction = "umap", group.by = "group")
plt3 <- DimPlot(seurat_obj, reduction = "umap", group.by = "per.mito.bin")
plt4 <- DimPlot(seurat_obj, reduction = "umap", group.by = "Phase")
(plt1 | plt2) / (plt3 | plt4 )ggsave(paste0(proj_name, '_umap_other-meta.png'), units = 'in', width = 12, height = 10, dpi = 250)Cell abundances
Simply determine the proportion of WT and KO cells for each of the cluster.
# Find cell #/% in each cluster and write output
count_per_cluster <- table(Idents(seurat_obj), seurat_obj@meta.data$group)
percent_per_cluster <- prop.table(table(Idents(seurat_obj), seurat_obj@meta.data$group), margin = 2)
count_per_cluster##
## 375KO WT
## 0 415 335
## 1 354 236
## 2 290 264
## 3 288 241
## 4 256 213
## 5 244 223
## 6 224 242
## 7 246 177
## 8 156 135
## 9 157 131
## 10 137 121
## 11 33 216
## 12 132 69
## 13 106 69
## 14 35 130
## 15 74 60
## 16 78 40
## 17 50 35
percent_per_cluster##
## 375KO WT
## 0 0.12671756 0.11406197
## 1 0.10809160 0.08035410
## 2 0.08854962 0.08988764
## 3 0.08793893 0.08205652
## 4 0.07816794 0.07252298
## 5 0.07450382 0.07592782
## 6 0.06839695 0.08239700
## 7 0.07511450 0.06026558
## 8 0.04763359 0.04596527
## 9 0.04793893 0.04460334
## 10 0.04183206 0.04119850
## 11 0.01007634 0.07354443
## 12 0.04030534 0.02349336
## 13 0.03236641 0.02349336
## 14 0.01068702 0.04426285
## 15 0.02259542 0.02042901
## 16 0.02381679 0.01361934
## 17 0.01526718 0.01191692
# plotting (% as for the entire dataset)
as.data.frame(percent_per_cluster) %>%
mutate(Percent = Freq * 100,
Cluster = Var1,
Condition = Var2) %>%
ggplot(., aes(Cluster, Percent, fill = Condition)) +
geom_bar(stat = 'identity', position = 'dodge') +
theme_classic() ggsave(paste0(proj_name, '_percent_cell_in_cluster.png'), width = 8, height = 6)
# plotting (% within a given cluster)
as.data.frame(prop.table(table(Idents(seurat_obj), seurat_obj@meta.data$group), margin = 1)) %>%
mutate(Percent = Freq * 100,
Cluster = Var1,
Condition = Var2) %>%
ggplot(., aes(Cluster, Percent, fill = Condition)) +
geom_bar(stat = 'identity') +
theme_classic() ggsave(paste0(proj_name, '_percent_sample_in_cluster.png'), width = 8, height = 6)
# write output
wb=createWorkbook()
ws=addWorksheet(wb,'Cells per cluster')
writeData(wb, ws, count_per_cluster, rowNames = F, colNames = T, startRow = 1,startCol = 1)
ws=addWorksheet(wb,'Percent per cluster')
writeData(wb, ws, percent_per_cluster, rowNames = F, colNames = T, startRow = 1,startCol = 1)
saveWorkbook(wb, paste0(proj_name, '_cells_per_cluster.xlsx'), overwrite = T)Session info
sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /programs/R-4.0.5/lib/libRblas.so
## LAPACK: /programs/R-4.0.5/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] glmGamPoi_1.2.0 BiocParallel_1.24.1 scDblFinder_1.4.0
## [4] openxlsx_4.2.5 forcats_0.5.1 dplyr_1.0.6
## [7] purrr_0.3.4 readr_2.1.2 tidyr_1.1.3
## [10] tibble_3.1.2 ggplot2_3.3.3 tidyverse_1.3.1
## [13] SeuratObject_4.0.3 Seurat_4.0.5 stringr_1.4.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20
## [3] tidyselect_1.1.1 htmlwidgets_1.5.3
## [5] grid_4.0.5 Rtsne_0.15
## [7] munsell_0.5.0 codetools_0.2-16
## [9] ica_1.0-2 statmod_1.4.36
## [11] scran_1.18.7 xgboost_1.4.1.1
## [13] future_1.21.0 miniUI_0.1.1.1
## [15] withr_2.5.0 colorspace_2.0-1
## [17] Biobase_2.50.0 highr_0.8
## [19] knitr_1.38 rstudioapi_0.13
## [21] stats4_4.0.5 SingleCellExperiment_1.12.0
## [23] ROCR_1.0-11 tensor_1.5
## [25] listenv_0.8.0 labeling_0.4.2
## [27] MatrixGenerics_1.2.1 GenomeInfoDbData_1.2.4
## [29] polyclip_1.10-0 farver_2.1.0
## [31] parallelly_1.25.0 vctrs_0.3.8
## [33] generics_0.1.0 xfun_0.30
## [35] R6_2.5.1 GenomeInfoDb_1.24.2
## [37] ggbeeswarm_0.6.0 rsvd_1.0.3
## [39] rmdformats_1.0.4 locfit_1.5-9.4
## [41] bitops_1.0-7 spatstat.utils_2.1-0
## [43] DelayedArray_0.16.3 assertthat_0.2.1
## [45] promises_1.2.0.1 scales_1.1.1
## [47] beeswarm_0.4.0 gtable_0.3.0
## [49] beachmat_2.6.4 globals_0.14.0
## [51] goftest_1.2-2 rlang_1.0.2
## [53] splines_4.0.5 lazyeval_0.2.2
## [55] spatstat.geom_2.1-0 broom_1.0.0
## [57] yaml_2.3.5 reshape2_1.4.4
## [59] abind_1.4-5 modelr_0.1.8
## [61] backports_1.4.1 httpuv_1.6.1
## [63] tools_4.0.5 bookdown_0.22
## [65] ellipsis_0.3.2 spatstat.core_2.1-2
## [67] jquerylib_0.1.4 RColorBrewer_1.1-2
## [69] BiocGenerics_0.36.1 ggridges_0.5.3
## [71] Rcpp_1.0.7 plyr_1.8.6
## [73] sparseMatrixStats_1.2.1 zlibbioc_1.36.0
## [75] RCurl_1.98-1.6 rpart_4.1-15
## [77] deldir_0.2-10 pbapply_1.4-3
## [79] viridis_0.6.2 cowplot_1.1.1
## [81] S4Vectors_0.28.1 zoo_1.8-9
## [83] SummarizedExperiment_1.18.2 haven_2.4.3
## [85] ggrepel_0.9.1 cluster_2.1.2
## [87] fs_1.5.2 magrittr_2.0.3
## [89] data.table_1.14.0 scattermore_0.7
## [91] lmtest_0.9-38 reprex_2.0.1
## [93] RANN_2.6.1 fitdistrplus_1.1-3
## [95] matrixStats_0.58.0 hms_1.1.1
## [97] patchwork_1.1.1 mime_0.10
## [99] evaluate_0.15 xtable_1.8-4
## [101] readxl_1.4.0 IRanges_2.22.2
## [103] gridExtra_2.3 compiler_4.0.5
## [105] scater_1.18.6 KernSmooth_2.23-20
## [107] crayon_1.5.1 htmltools_0.5.2
## [109] mgcv_1.8-35 later_1.2.0
## [111] tzdb_0.3.0 lubridate_1.8.0
## [113] DBI_1.1.2 dbplyr_2.1.1
## [115] MASS_7.3-54 Matrix_1.3-3
## [117] cli_3.3.0 parallel_4.0.5
## [119] igraph_1.2.6 GenomicRanges_1.40.0
## [121] pkgconfig_2.0.3 plotly_4.9.3
## [123] scuttle_1.0.4 spatstat.sparse_2.0-0
## [125] xml2_1.3.3 vipor_0.4.5
## [127] bslib_0.3.1 dqrng_0.2.1
## [129] XVector_0.30.0 rvest_1.0.2
## [131] digest_0.6.29 sctransform_0.3.2
## [133] RcppAnnoy_0.0.18 spatstat.data_2.1-0
## [135] rmarkdown_2.13 cellranger_1.1.0
## [137] leiden_0.3.7 edgeR_3.32.1
## [139] uwot_0.1.10 DelayedMatrixStats_1.12.3
## [141] shiny_1.6.0 lifecycle_1.0.0
## [143] nlme_3.1-152 jsonlite_1.8.0
## [145] BiocNeighbors_1.8.2 limma_3.44.3
## [147] viridisLite_0.4.0 fansi_0.4.2
## [149] pillar_1.6.1 lattice_0.20-44
## [151] fastmap_1.1.0 httr_1.4.2
## [153] survival_3.2-11 glue_1.4.2
## [155] zip_2.2.0 png_0.1-7
## [157] bluster_1.0.0 stringi_1.7.6
## [159] sass_0.4.1 BiocSingular_1.6.0
## [161] irlba_2.3.3 future.apply_1.7.0